import pandas as pd
from pandas import read_sql_table
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport
SQLALCHEMY_DATABASE_URI = 'postgresql://{db_user}:{db_password}@{db_host}:5432/{db_name}'.format(
db_host = '34.69.215.94',
db_name = 'snpiao_data',
db_password = 'ucamnafinki',
db_user = 'snpiao_data',
)
energy = read_sql_table('tsdb_fronius_energy',SQLALCHEMY_DATABASE_URI)
cams = read_sql_table('tsdb_cams', SQLALCHEMY_DATABASE_URI)
merra = read_sql_table('tsdb_merra', SQLALCHEMY_DATABASE_URI)
energy = energy.set_index('datetime').sort_index()
cams = cams.set_index('datetime').sort_index()
merra = merra.set_index('datetime').sort_index()
energy = energy.resample('1H').agg('mean')
merged = cams.merge(energy, on='datetime')
merged = merged.merge(merra, on='datetime')
merged.fillna(method='ffill', inplace=True)
merged['hour'] = merged.index.hour
merged['dayofweek'] = merged.index.dayofweek
merged['consumption'] = merged['FromGenToConsumer'] + merged['FromGridToConsumer']
pyplot.plot(merged['consumption'])
q = merged['consumption'].quantile(0.99)
merged = merged[merged['consumption'] < q]
pyplot.plot(merged['consumption'])
pyplot.show()
merged
profile = ProfileReport(merged, title="Pandas Profiling Report")
profile
X = pd.DataFrame(data=merged, columns=['gh', 'csky_ghi', 'tdry', 'wspd', 'wdir' 'pres_hpa', 'rhum', 'snowd_m', 'rainfall'])
y = pd.DataFrame(data=merged, columns=['consumption'])
to_csv = pd.DataFrame(data = merged, columns=['gh', 'csky_ghi', 'tdry', 'wspd', 'pres_hpa', 'rhum', 'snowd_m', 'rainfall', 'consumption'])
y.plot()
to_csv